home *** CD-ROM | disk | FTP | other *** search
- /*
- * munge.c -- Program to convert a text file into "munged" form,
- * suitable for reconstruction from printed form. Tabs are
- * made visible and checksums are added to each line and each
- * page to protect against transcription errors.
- *
- * Copyright (C) 1997 Pretty Good Privacy, Inc.
- *
- * Designed by Colin Plumb, Mark H. Weaver, and Philip R. Zimmermann
- * Written by Mark H. Weaver
- *
- * $Id: munge.c,v 1.18 1997/07/09 15:07:49 colin Exp $
- */
-
- #include <stdio.h>
- #include <errno.h>
- #include <string.h>
- #include <ctype.h>
-
- #include "crc.h"
-
- /*
- * The file is divided into pages, and the format of each page is
- *
- --f414 000b2dc79af40010002 Page 1 of munge.c
-
- bc38e5 /*
- 40a838 * munge.c -- Program to convert a text file into munged form
- 647222 *
- 193f28 * Copyright (C) 1997 Pretty Good Privacy, Inc.
- 827222 *
- 699025 * Designed by Colin Plumb, Mark H. Weaver, and Philip R. Zimmermann
- 0d050c * Written by Mark H. Weaver
- *
- * Where the first 2 columns are the high 8 bits (in hex) of a running
- * CRC-32 of the page (the string "--", unlikely to be confused with
- * any digits, indicates a page header line) and the next 4 columns
- * are a CRC-16 of the rest of the line. Then a space (not counted in
- * the CRC), and the line of text. Tabs are printed as the currency
- * symbol (ISO Latin 1 character 164) followed by the appropriate number
- * of spaces, and any form feeds are printed as a yen symbol (Latin 1 165).
- * The CRC is computed on the transformed line, including the trailing
- * newline. No trailing whitespace is permitted.
- *
- * The header line contains a (hex) number of the form 0ffcccccccctpppnnnn,
- * where the digit 0 is a version number, ff are flags, ccccccc is the CRC-32
- * of the page, t is the tab size (usually 4 or 8; 0 for binary files that
- * are sent in radix-64), ppp is the product number (usually 1, different
- * for different books), and nnnn is the file number (sequential from 1).
- *
- * This is followed by " Page %u of " and the file name.
- */
-
- typedef struct MungeState
- {
- int binaryMode, tabWidth;
- long origLineNumber;
- long productNumber, fileNumber, pageNumber, lineNumber;
- unsigned long fileOffset;
- word32 runningCRC;
- char const * fileName;
- char const * fileNameTail;
- char * pageBuffer; /* Buffer large enough to hold one page */
- char * pagePos; /* Current position in pageBuffer */
- word16 hdrFlags;
- FILE * file;
- FILE * out;
- } MungeState;
-
- void ChecksumLine(char const *line, size_t length,
- char *prefix, word32 *runningCRC)
- {
- word16 lineCRC;
- byte runCRCPart = 0;
-
- lineCRC = CalculateCRC16(0, (byte const *)line, length);
- if (runningCRC != NULL)
- {
- *runningCRC = CalculateCRC32(*runningCRC, (byte const *)line, length);
- runCRCPart = (*runningCRC >> 24);
- }
- sprintf(prefix, (FMT8 FMT16), runCRCPart, lineCRC);
- prefix[6] = ' '; /* Write a space over the null byte */
- }
-
- /* Returns 1 for convenience */
- int PrintFileError(MungeState *state, char const *message)
- {
- fprintf(stderr, "%s in %s %s %lu\n", message, state->fileName,
- state->binaryMode ? "offset" : "line",
- state->binaryMode ? state->fileOffset : state->origLineNumber);
- return 1;
- }
-
- int MungeLine(MungeState *state, char *buffer, int length,
- char *line, int *bufferUsed)
- {
- int i, j;
- char ch;
-
- if (length < 1 || buffer[length - 1] != '\n')
- {
- buffer[length++] = '\n';
- buffer[length] = '\0';
- }
- /* return PrintFileError(state, "ERROR: Missing newline at end of file"); */
-
- i = 0;
- j = 0;
- for (i = 0; i < length && j <= LINE_LENGTH; i++)
- {
- ch = buffer[i];
- if (ch == '\t')
- {
- line[j++] = TAB_CHAR;
- if (state->tabWidth < 1)
- return PrintFileError(state,
- "ERROR: Tab found in radix64 stream");
- else
- while (j % state->tabWidth && j <= LINE_LENGTH)
- line[j++] = TAB_PAD_CHAR;
- }
- else if (ch == '\n')
- {
- if (++i < length)
- return PrintFileError(state,
- "UNEXPECTED ERROR: fgets read past newline!?");
- break;
- }
- else if (ch == '\f')
- {
- i++;
- line[j++] = FORMFEED_CHAR;
- break;
- }
- else if (ch >= ' ' && ch <= '~')
- line[j++] = ch;
- else
- return PrintFileError(state, "ERROR: Non-ASCII char");
- }
- /* Strip trailing spaces */
- while (j > 0 && isspace((unsigned char)line[j - 1]))
- j--;
-
- if (j > LINE_LENGTH)
- return PrintFileError(state, "ERROR: Line too long");
-
- /* Add trailing newline and NULL */
- line[j++] = '\n';
- line[j++] = '\0';
-
- /* Return number of chars used from buffer */
- *bufferUsed = i;
-
- return 0;
- }
-
- static void
- Encode3(byte const src[3], char dest[4])
- {
- dest[0] = radix64Digits[ (src[0]>>2 & 0x3f)];
- dest[1] = radix64Digits[(src[0]<<4 & 0x30) | (src[1]>>4 & 0x0f)];
- dest[2] = radix64Digits[(src[1]<<2 & 0x3c) | (src[2]>>6 & 0x03)];
- dest[3] = radix64Digits[(src[2] & 0x3f)];
- }
-
- static int
- EncodeLine(byte const *src, int srcLen, char *dest)
- {
- char * destp = dest;
- byte tempSrc[3];
-
- for (; srcLen >= 3; srcLen -= 3)
- {
- Encode3(src, destp);
- src += 3; destp += 4;
- }
-
- if (srcLen > 0)
- {
- memset(tempSrc, 0, sizeof(tempSrc));
- memcpy(tempSrc, src, srcLen);
- Encode3(src, destp);
- src += 3; destp += 4; srcLen -= 3;
- while (srcLen < 0)
- destp[srcLen++] = '=';
- }
-
- return destp - dest;
- }
-
- static int
- MungeBinaryLine(MungeState *state, byte const *buffer, int length, char *line)
- {
- char binLine[128];
- int binLength; /* Destination length */
- int used;
-
- binLength = EncodeLine(buffer, length, binLine);
-
- /* Append newline */
- binLine[binLength++] = '\n';
- binLine[binLength] = '\0';
-
- return MungeLine(state, binLine, binLength, line, &used);
- }
-
- int MaybePageBreak(MungeState *state)
- {
- if (state->lineNumber >= LINES_PER_PAGE)
- {
- char line[512];
- char * lineData = line + PREFIX_LENGTH;
-
- sprintf(lineData, "%01x%02x%08lx%01x%03lx%04lx Page %ld of %s\n",
- 0, /* 1: Format version 0 */
- state->hdrFlags, /* 2: Flags */
- state->runningCRC, /* 8: Running CRC32 */
- state->tabWidth, /* 1: Tab width (0 means radix64) */
- state->productNumber, /* 3: Product number (0 - 4095) */
- state->fileNumber, /* 4: File number (0 - 65535) */
- state->pageNumber + 1,
- state->fileNameTail);
-
- if (strlen(lineData) > LINE_LENGTH + 1)
- {
- PrintFileError(state, "ERROR: Header line too long");
- fprintf(stderr, "> %s", lineData);
- return -1;
- }
-
- /* Compute checksums and prefix them to line */
- ChecksumLine(lineData, strlen(lineData), line, NULL);
-
- fprintf(state->out, "--%s\n%s", line + 2, state->pageBuffer);
-
- state->pageNumber++;
- state->lineNumber = 0;
- state->runningCRC = 0;
- state->pagePos = state->pageBuffer; /* Clear page buffer */
- }
- return 0;
- }
-
- /*
- * Search for Emacs "tab-width: " maker in file.
- * Emacs is stricter about the format, but this will do.
- */
- int FindTabWidth(MungeState *state)
- {
- char const * const tabWidthMarker = " tab-width: ";
- char buffer[512];
- char * p;
- int length;
- int tabWidth = 0;
-
- fseek(state->file, -(sizeof(buffer) - 1), SEEK_END);
- length = fread(buffer, 1, sizeof(buffer) - 1, state->file);
- buffer[length] = '\0';
- p = strstr(buffer, tabWidthMarker);
- if (p != NULL)
- {
- p += strlen(tabWidthMarker);
- while (*p != '\0' && *p != '\n' && isspace(*p))
- p++;
- tabWidth = strtol(p, &p, 10);
- while (*p != '\0' && *p != '\n' && isspace(*p))
- p++;
- if (*p != '\n' || tabWidth < 2)
- tabWidth = 0;
- else if (tabWidth > 16)
- fprintf(stderr, "WARNING: Weird tab-width (%d), %s\n",
- tabWidth, state->fileName);
- }
- return tabWidth;
- }
-
- /*
- * Open the given source file and send the munged output to the
- * FILE *, with the given options.
- */
- int MungeFile(char const *fileName, FILE *out, int binaryMode,
- int defaultTabWidth, long productNumber, long fileNumber)
- {
- MungeState * state;
- int length, used;
- char line[PREFIX_LENGTH + LINE_LENGTH + 10];
- char * lineData = line + PREFIX_LENGTH;
- char buffer[128];
- int result = 0;
-
- state = (MungeState *)calloc(1, sizeof(*state));
- state->origLineNumber = 0;
- state->fileName = fileName;
- state->runningCRC = 0;
- state->productNumber = productNumber;
- state->fileNumber = fileNumber;
- state->pageNumber = 0;
- state->lineNumber = 0;
- state->fileOffset = 0;
- state->binaryMode = binaryMode;
- state->pageBuffer = malloc(PAGE_BUFFER_SIZE);
- state->pageBuffer[0] = '\0';
- state->pagePos = state->pageBuffer;
- state->hdrFlags = 0;
- state->out = out;
-
- state->fileNameTail = strrchr(state->fileName, '/');
- if (state->fileNameTail == NULL)
- state->fileNameTail = state->fileName;
- else
- state->fileNameTail++;
-
- state->file = fopen(state->fileName, binaryMode ? "rb" : "r");
- if (state->file == NULL)
- {
- result = errno;
- goto error;
- }
-
- if (state->binaryMode)
- {
- state->tabWidth = 0;
- }
- else
- {
- state->tabWidth = FindTabWidth(state);
- if (state->tabWidth == 0)
- state->tabWidth = defaultTabWidth;
- rewind(state->file);
- }
-
- while (!feof(state->file))
- {
- state->origLineNumber++;
-
- if (state->binaryMode)
- {
- length = fread(buffer, 1, BYTES_PER_LINE, state->file);
- if (length < 1)
- {
- if (feof(state->file))
- break;
- goto fileError;
- }
- if ((result = MaybePageBreak(state)))
- goto error;
- if ((result = MungeBinaryLine(state, buffer, length, lineData)))
- goto error;
- state->fileOffset += length;
- }
- else
- {
- if (fgets(buffer, sizeof(buffer), state->file) == NULL)
- {
- if (feof(state->file))
- break;
- goto fileError;
- }
- length = strlen(buffer);
- if ((result = MaybePageBreak(state)))
- goto error;
- if ((result = MungeLine(state, buffer, length, lineData, &used)))
- goto error;
-
- if (used < length)
- if (fseek(state->file, used - length, SEEK_CUR))
- goto fileError;
- }
-
- /* Compute checksums and prefix them to the line */
- ChecksumLine(lineData, strlen(lineData), line, &state->runningCRC);
-
- strcpy(state->pagePos, line);
- length = strlen(state->pagePos);
- /* Suppress trailing whitespace on blank lines */
- if (length == PREFIX_LENGTH+1 && state->pagePos[length-1] == '\n') {
- state->pagePos[--length-1] = '\n';
- state->pagePos[length] = '\0';
- }
- state->pagePos += length;
-
- state->lineNumber++;
- }
-
- if (state->lineNumber > 0)
- {
- /* Force a final page break */
- state->lineNumber = LINES_PER_PAGE;
- state->hdrFlags |= HDR_FLAG_LASTPAGE;
- if ((result = MaybePageBreak(state)))
- goto error;
- }
-
- result = 0;
- goto done;
-
- fileError:
- result = ferror(state->file);
-
- error:
- done:
- if (state != NULL)
- {
- if (state->file != NULL)
- fclose(state->file);
- free(state);
- }
- return result;
- }
-
- int main(int argc, char *argv[])
- {
- int result = 0;
- int i, j;
- int defaultTabWidth = 4;
- int binaryMode = 0;
-
- InitCRC();
-
- for (i = 1; i < argc && argv[i][0] == '-'; i++)
- {
- if (0 == strcmp(argv[i], "--"))
- {
- i++;
- break;
- }
- for (j = 1; argv[i][j] != '\0'; j++)
- {
- if (isdigit(argv[i][j]))
- {
- defaultTabWidth = argv[i][j] - '0';
- if (defaultTabWidth < 2 || defaultTabWidth > 9)
- fprintf(stderr, "WARNING: Weird default tab-width (%d)\n",
- defaultTabWidth);
- }
- else if (argv[i][j] == 'b')
- {
- binaryMode = 1;
- }
- else
- {
- fprintf(stderr, "ERROR: Unrecognized option -%c\n", argv[i][j]);
- exit(1);
- }
- }
- }
-
- for (; i < argc; i++)
- {
- if ((result = MungeFile(argv[i], stdout, binaryMode,
- defaultTabWidth, 1, argc)) != 0)
- {
- /* If result > 0, message should have already been printed */
- if (result < 0)
- fprintf(stderr, "ERROR: %s\n", strerror(result));
- exit(1);
- }
- }
-
- return 0;
- }
-
- /*
- * Local Variables:
- * tab-width: 4
- * End:
- * vi: ts=4 sw=4
- * vim: si
- */
-
-